* D:\E\replications\BJPS2020\rawdata\silc\yale_eu-silc_july2019.do

	// Perhaps useful: https://www.gesis.org/en/missy/metadata/EU-SILC/

* cd D:\E\replications\BJPS2020\
cap log close
log using ./rawdata/silc/yale_eu-silc_july2019_data.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

* Convert CSV to DTA
// Longitudinal files (cd "D:/E/eu_silc2017/Long/")
set more off
clear all
cd ./rawdata/silc/licensed/long/
* foreach c in AT BE BG CH CY CZ DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK {
foreach c in AT BE CH DK EL ES FI FR IE IS IT LU NL NO PT SE UK {
	cd ./`c'/
	folders
	foreach y in `r(folders)' {
		cd ./`y'/
		di "** `c' `y' **"
		fs *.csv
		foreach f in `r(files)' {
			insheet using "`f'", comma clear
			gen file="`f'"
			local t=substr("`f'",10,1)
			di "`t'"
			gen record="`t'"
			gen wave=`y'
			note: "/rawdata/silc/licensed/long/`c'/`y'/`f'"
			compress
			tempfile t_`t'
			save `t_`t'', replace
		}
		cd ..
		foreach t in d h p r {
			use `t_`t'', clear
			compress
			cap noisily destring `t'b100, replace
			saveold `c'_`t'_`y'.dta, replace
		}
	}
	cd ..
}

* Build Stata datasets
	* D: HOUSEHOLD REGISTER (D-FILE)
	* R: PERSONAL REGISTER (R-FILE)
	* H: HOUSEHOLD DATA (H-FILE)
	* P: PERSONAL DATA (P-FILE)
// Longitudinal files
set more off
clear all

*foreach c in AT BE BG CH CY CZ DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK {
foreach c in AT BE CH DK EL ES FI FR IE IS IT LU NL NO PT SE UK {
	foreach d in p h r {
		cd ./`c'/
		di "** `d' `c'**"
		clear		
		fs *_`d'_*.dta
		quietly append using `r(files)', force
		cd ..
		gen country="`c'"
		compress
		saveold `d'_`c'.dta, replace
	}
}

foreach v in p h r { 
	clear
	fs `v'_*.dta
	quietly append using `r(files)', force
	saveold `v', replace
}

* ./rawdata/silc/licensed/long/
cd ..
cd ..
cd ..
cd ..

use ./rawdata/silc/licensed/long/p.dta, clear
clonevar rb010=pb010 // year of the survey
clonevar rb020=pb020 // country
clonevar rb030=pb030 // personal id
clonevar rb040=px030 // hid
cap noisily isid rb010 rb020 rb030
bys rb010 rb020 rb030 (wave): gen N=_N
tab pb020 N, m
isid wave rb010 rb020 rb030
drop N
cap drop _merge
merge 1:1 wave rb010 rb020 rb030 rb040 using ./rawdata/silc/licensed/long/r.dta
drop if _merge==2
rename _merge p_r_merge
isid wave country pb010 pb030 
	
clonevar hb010=pb010 // survey year
clonevar hb030=px030 // hid
isid country wave hb010 pb030
merge m:1 wave country hb010 hb030 using ./rawdata/silc/licensed/long/h.dta
rename _merge pr_h_merge
drop if pr_h_merge==2

clonevar pid=pb030 // pid
clonevar hid=hb030 // hid

clonevar year=pb010
isid wave country year pid

cap noisily do ./rawdata/silc/eu-silc_var-labels.do
label var pb050 "Personal base weight"
label var pb080 "Personal base weight for selected respondent (register countries only)"
* see http://www.stat.fi/eusilc/ws_3-1_museux.pdf
	
// identify duplicates to down-weight them later
cap drop N
bys pb010 pb020 pb030 pb140 pb150 pb160 pb170 (wave): gen dupl=_N
label var dupl "# of likely duplicate observations"
tab pb020 dupl, m
d pb010 pb020 pb030 pb140 pb150 pb160 pb170 wave

egen hhid=group(wave hb020 hb030)
label var hhid "Unique household ID (constructed)"
rename pid org_pid
egen pid=group(wave pb020 pb030) 
label var pid "Unique person ID (constructed)"

isid pid year
xtset pid year, yearly

bys pid: gen N_obs=_N
tab N_obs
tab country N_obs, m

clonevar month=rb070
clonevar yob=rb080

* Weight
clonevar weight=pb050
replace weight=weight/dupl
	// account for duplicates
	// replace weight=1 if ccode==350 | ccode==375 | ccode==205 // see "H:\MainLargeFileStore\Rehm\Data\EU_SILC\data_harvest\2006\Documentation\SILC L-2006 UDB PROBLEMS.xls". The weights are incorrect in these countries
* Alternative weights
	// RB060 - Personal base weight
	// RB062 - Longitudinal weight (two-year duration)
	// RB063 - Longitudinal weight (three-year duration)
	// RB064 - Longitudinal weight (four-year duration) 

gen female=(pb150==2) if pb150<.
rename px020 age // PX020 - Age at the end of the income reference period 

cap drop ccode
kountry hb020, from(iso2c) to(cown)
rename _COWN_ ccode
replace ccode=200 if hb020=="UK"
replace ccode=350 if hb020=="EL"
kountry ccode, from(cown) to(iso3c)
rename _ISO3C_ iso3c
labmask ccode, val(iso3c)
numlabel ccode, add

xtset pid year, yearly
compress
note: .\rawdata\silc\yale_eu-silc_july2019.do
note: Created on `= c(current_date)'

saveold ./rawdata/silc/licensed/eu-silc.dta, replace

display "$S_TIME  $S_DATE"
cap log close

****************
* Calculate ESIs
****************

cap log close
log using ./rawdata/silc/yale_eu-silc_july2019_ESIs.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

set more off

use year pid hhid ccode iso3c weight female age hx040 ///
	hx010 px010 ///
	rb250 pb050 pb080 pb150 py010g py021g py050g py080g py090g py100g py110g py120g py130g py140g *wave *file ///
	hb010 hb020 hb030 hy010 hy010_f hy010_i hy020 hy020_f hy020_i hy022 hy022_f hy022_i hy023 hy023_f hy023_i hy025 hy025_f hy040g hy090g hy130g hy050g hy060g hy070g hy080g hy110g hy120g hy130g hy140g hx040 hx050 hx090 hx100 ///
	pe020 pl110 pe040* pl050 pl051 pb190 pl030 pl031 pl020 pl025 pl040 ///
	pl060 pl140 pl160 pl170 pl180 ph010 ph020 ph030 ///
	pb030 rb070 rb080 yob month ///
	using ./rawdata/silc/licensed/eu-silc.dta, clear

* CPI (Eurostat)
cap drop cpi*
drop year 
gen year=hb010
replace year=year-1 if !inlist(ccode,200,205)
cap drop _merge
merge m:1 ccode year using ./rawdata/silc/cpi_eurostat.dta
drop if _merge==2
drop _merge
// Data for Serbia: http://data.worldbank.org/indicator/FP.CPI.TOTL?end=2015&locations=RS&start=2010
replace cpi2010=111 if year==2011 & ccode==345 
replace cpi2010=119 if year==2012 & ccode==345 
replace cpi2010=128 if year==2013 & ccode==345 
replace cpi2010=131 if year==2014 & ccode==345
replace cpi2010=133 if year==2015 & ccode==345
drop year 
gen year=hb010

gen version="Dec 4, 2019"
	
// "All variables are in € (EURO). For the countries not members of the euro area the conversion factor can be found in variables HX010 and PX010.
// Income data (euro) i. e. HY020 * HX010 = income data (national currency)."
// C:\Dropbox\Rehm\Data\EU-SILC\yale\documentation_jan2016\L-2013 DIFFERENCES BETWEEN DATACOLLECTED AND UDB 15-08-01.doc
// convert countries that DO NOT HAVE the Euroe at any point into NCU:

ds hy??? hy???? py????
di "`r(varlist)'"
local n "`r(varlist)'"
local m: list uniq n
foreach v in `m' {
	replace `v'=`v'*(hx010) if inlist(ccode,200,225,290,310,316,343,344,345,355,360,367,368,380,385,390,395,640)
	// these countries switch from NCU to Euros: inlist(ccode,317,338,349,352)
}

* HY020 = HY010 – HY120G – HY130G – HY140G. See p. 218 in C:\Dropbox\Rehm\Data\EU-SILC\yale\documentation\Guidelines Doc65_2011.pdf

// these are very likely wrong in terms of income variables:
drop if ccode==212 & year==2002
drop if ccode==230 & year==2003
drop if ccode==230 & year==2004 & wave==2005
drop if ccode==220 & year<=2004
drop if ccode==235 & year<=2005
drop if ccode==325 & year<=2005
drop if ccode==350 & year<=2005
drop if ccode==367 & year<=2005
	
label define rb250 ///
	/// Information or interview completed
	11 "information completed only from interview" /// 
	12 "information completed only from registers" /// 
	13 "information completed from both: interview and registers" /// 
	14 "information completed from full-record imputation" /// 
	/// Interview not completed, though contact made
	21 "individual unable to respond (illness, incapacity, etc) and no proxy possible" /// 
	22 "failed to return self-completed questionnaire" /// 
	23 "refusal to co-operate" /// 
	/// Individual not contacted because
	31 "person temporarily away and no proxy possible" /// 
	32 "no contact for other reasons" /// 
	/// Information or interview not completed
	33 "information not completed: reason unknown", modify
label val rb250 rb250

* income concepts
cap drop DI MI
clonevar DI=hy020
label var DI "HH-level disposable income (EU-SILC's hy020)"
clonevar MI=hy023
label var MI "HH-level market income (EU-SILC's hy023)"

* Market income 'by hand'
egen tmp_mi_p=rowtotal(py010g py050g py080g)
bys hhid year: egen tmp_mi_h1=total(tmp_mi_p)
egen tmp_mi_h2=rowtotal(hy040g hy080g hy090g hy110g)
gen mi=tmp_mi_h1+tmp_mi_h2
replace mi=mi-hy130g if hy130g<.

* Transfer income 'by hand'
egen tmp_ti_p=rowtotal(py090g py100g py110g py120g py130g py140g)
bys hhid year: egen tmp_ti_h1=total(tmp_ti_p)
egen tmp_ti_h2=rowtotal(hy050g hy060g hy070g)
gen ti=tmp_ti_h1+tmp_ti_h2

* Taxes 'by hand'
egen tx=rowtotal(hy120g hy130g hy140g) 

* Gross income GI 'by hand'
egen gi=rowtotal(mi ti)
label var gi "Gross income, hh-level"

* Disposable income DI 'by hand'
gen di=gi-tx
label var di "Disposable income, hh-level"

replace di=. if DI==.
replace mi=. if MI==.

tabstat DI MI di mi if ccode==395, by(year) s(p50) f(%9.0fc)

* equivalize
clonevar HHsize=hx040
gen equivalence=sqrt(HHsize)
label var equivalence "Equivalence scale: sqrt(HHsize)"

set more off 
foreach v in mi di MI DI {
	cap drop `v'_e
	gen `v'_e=`v'/equivalence if `v'>0 
}

* top- and bottom-coding, then CPI
set more off
foreach v of varlist mi_e di_e MI_e DI_e {
	bys ccode year: egen p1_`v'=wpctile(`v'), p(1) weights(weight)
	bys ccode year: egen p99_`v'=wpctile(`v'), p(99) weights(weight)
	replace `v'=p1_`v' if `v'<p1_`v' // bottom-code at p1
	replace `v'=p99_`v' if `v'>p99_`v' & `v'<. // top-code at p99
	replace `v'=`v'/cpi2010 // express in 2010 NCU
}

* Income drops and ginis
cap drop ESI*
egen group=group(wave ccode year), label

set more off
foreach v in mi_e di_e MI_e DI_e {
	xtset pid year, yearly
	egen gini_`v'_2560 = inequal(`v') if age>=25 & age<=60, by(group) weight(weight) index(gini)
	egen gini_`v'_all = inequal(`v'), by(group) weight(weight) index(gini)	
	cap drop d`v'
	gen d1_`v'=d1.`v'/abs(l1.`v') if `v'<. & l1.`v'<.
	label var d1_`v' "Growth in `v'"
	gen d`v'=d1.`v'/(abs(l1.`v'+`v')/2) if `v'<. & l1.`v'<. // arc changes
	label var d`v' "Arc change of `v'"
	gen ESI25_`v'=(d`v'<=-0.25) if d`v'<.
	gen ESI50_`v'=(d`v'<=-0.5) if d`v'<.
	gen ESI10_`v'=(d`v'<=-0.1) if d`v'<.
	gen ESI00_`v'=(d`v'<0) if d`v'<.
	gen upESI25_`v'=(d`v'>=0.25) if d`v'<.
	gen upESI50_`v'=(d`v'>=0.5) if d`v'<.
	gen upESI10_`v'=(d`v'>=0.1) if d`v'<.
	gen upESI00_`v'=(d`v'>0) if d`v'<.

	gen gESI25_`v' =(d1_`v'<=-0.25) if d1_`v'<.
	gen gESI50_`v' =(d1_`v'<=-0.5) if d1_`v'<.
	gen gESI10_`v' =(d1_`v'<=-0.1) if d1_`v'<.
	gen gESI00_`v' =(d1_`v'<0)   if d1_`v'<.
	gen upgESI25_`v'=(d1_`v'>=0.25) if d1_`v'<.
	gen upgESI50_`v'=(d1_`v'>=0.5)  if d1_`v'<.
	gen upgESI10_`v'=(d1_`v'>=0.1)  if d1_`v'<.
	gen upgESI00_`v'=(d1_`v'>0)   if d1_`v'<.
	
	if "`v'"=="mi_e" local foo "HH-level market income (by hand)"
	if "`v'"=="MI_e" local foo "HH-level market income (EU-SILC's hy010)"
	if "`v'"=="di_e" local foo "HH-level disposable income (by hand)"
	if "`v'"=="DI_e" local foo "HH-level disposable income (EU-SILC's hy023)"
	label var gini_`v'_2560 "Gini of `foo', ages 25-60"
	label var gini_`v'_all "Gini of `foo', ages all"
	foreach j in 00 10 25 50 {
		label var ESI`j'_`v' "`j'+% arc drop, `foo'"
		label var upESI`j'_`v' "`j'+% arc gain, `foo'"
		label var gESI`j'_`v' "`j'+% drop, `foo'"
		label var upgESI`j'_`v' "`j'+% gain, `foo'"
	}
}

note: Created on `= c(current_date)'
compress
save C:\Users\rehm.16\Desktop\tmp\silc_dec2017\eu_silc_arc4.dta, replace

// Joint incidence of MI + DI drops
foreach j in 00 10 25 50 {
	egen compESI`j'_MIDI=group(ESI`j'_MI_e ESI`j'_DI_e), label
	egen compESI`j'_midi=group(ESI`j'_mi_e ESI`j'_di_e), label
	label var compESI`j'_MIDI "group(ESI`j'_MI_e ESI`j'_DI_e)"
	label var compESI`j'_midi "group(ESI`j'_mi_e ESI`j'_di_e)"
	egen compgESI`j'_MIDI=group(gESI`j'_MI_e gESI`j'_DI_e), label
	egen compgESI`j'_midi=group(gESI`j'_mi_e gESI`j'_di_e), label
	label var compgESI`j'_MIDI "group(gESI`j'_MI_e gESI`j'_DI_e)"
	label var compgESI`j'_midi "group(gESI`j'_mi_e gESI`j'_di_e)"
}
label list compESI25_MIDI

foreach j in 00 10 25 50 {
	foreach m in 1 2 3 4 {
		gen RR`j'_MIDI`m'=(compESI`j'_MIDI==`m') if compESI`j'_MIDI<.
		gen RR`j'_midi`m'=(compESI`j'_midi==`m') if compESI`j'_midi<.
		gen RR`j'_gMIDI`m'=(compgESI`j'_MIDI==`m') if compgESI`j'_MIDI<.
		gen RR`j'_gmidi`m'=(compgESI`j'_midi==`m') if compgESI`j'_midi<.
	}
	label var RR`j'_MIDI1 "MI+DI drops (ESI`j'_MI_e=0 + ESI`j'_DI_e==0)"
	label var RR`j'_midi1 "MI+DI drops (ESI`j'_mi_e=0 + ESI`j'_di_e==0)"
	label var RR`j'_MIDI2 "MI+DI drops (ESI`j'_MI_e=0 + ESI`j'_DI_e==1)"
	label var RR`j'_midi2 "MI+DI drops (ESI`j'_mi_e=0 + ESI`j'_di_e==1)"
	label var RR`j'_MIDI3 "MI+DI drops (ESI`j'_MI_e=1 + ESI`j'_DI_e==0)"
	label var RR`j'_midi3 "MI+DI drops (ESI`j'_mi_e=1 + ESI`j'_di_e==0)"
	label var RR`j'_MIDI4 "MI+DI drops (ESI`j'_MI_e=1 + ESI`j'_DI_e==1)"
	label var RR`j'_midi4 "MI+DI drops (ESI`j'_mi_e=1 + ESI`j'_di_e==1)"
	label var RR`j'_gMIDI1 "MI+DI drops (gESI`j'_MI_e=0 + gESI`j'_DI_e==0)"
	label var RR`j'_gmidi1 "MI+DI drops (gESI`j'_mi_e=0 + gESI`j'_di_e==0)"
	label var RR`j'_gMIDI2 "MI+DI drops (gESI`j'_MI_e=0 + gESI`j'_DI_e==1)"
	label var RR`j'_gmidi2 "MI+DI drops (gESI`j'_mi_e=0 + gESI`j'_di_e==1)"
	label var RR`j'_gMIDI3 "MI+DI drops (gESI`j'_MI_e=1 + gESI`j'_DI_e==0)"
	label var RR`j'_gmidi3 "MI+DI drops (gESI`j'_mi_e=1 + gESI`j'_di_e==0)"
	label var RR`j'_gMIDI4 "MI+DI drops (gESI`j'_MI_e=1 + gESI`j'_DI_e==1)"
	label var RR`j'_gmidi4 "MI+DI drops (gESI`j'_mi_e=1 + gESI`j'_di_e==1)"
}

foreach v in mi_e di_e MI_e DI_e {
	clonevar my_`v'=`v'
	clonevar sd_`v'=`v'
	label var my_`v' "`: var label `v'' (mean)"
	label var sd_`v' "`: var label `v'' (SD)"
}

* collapse for ESIs
preserve
	* all ages
	foreach j in 00 10 25 50 {
		foreach v in mi_e di_e MI_e DI_e {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}
	foreach v of varlist _all {
		local l`v': var label `v'
	}
	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_*e d1_*e [aw=weight], by(ccode year)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen dataset="SILC"
	isid ccode year
	compress
	gen ages="all"
	compress
	note: ./rawdata/silc/yale_eu-silc_july2019.do
	note: Created on `= c(current_date)'
	save ./rawdata/silc/ESIs_silc.dta, replace
restore

preserve
	* ages 25-60
	keep if age>=25 & age<=60
	foreach j in 00 10 25 50 {
		foreach v in mi_e di_e MI_e DI_e {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}

	foreach v of varlist _all {
		local l`v': var label `v'
	}
	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_*e d1_*e [aw=weight], by(ccode year)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen dataset="SILC"
	isid ccode year
	compress
	gen ages="25-60"
	compress
	note: ./rawdata/silc/yale_eu-silc_july2019.do
	note: Created on `= c(current_date)'
	save ./rawdata/silc/ESIs_ages25-60_silc.dta, replace
restore

display "$S_TIME  $S_DATE"
cap log close
